#!/usr/bin/python3
# ******************************************************************************
# Copyright (c) Huawei Technologies Co., Ltd. 2021-2021. All rights reserved.
# licensed under the Mulan PSL v2.
# You can use this software according to the terms and conditions of the Mulan PSL v2.
# You may obtain a copy of Mulan PSL v2 at:
#     http://license.coscl.org.cn/MulanPSL2
# THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR
# PURPOSE.
# See the Mulan PSL v2 for more details.
# ******************************************************************************/
import json
from json import JSONDecodeError
import re
from collections import namedtuple
from functools import wraps

from bs4 import BeautifulSoup

from constant import Constant
from exception import RequestError
from logger import logger
from request import http
from conf import settings


def match(regexes, component):
    """
    Decorator match commit, pr, issue
    :param regexes: regexes list
    :param component: commit, pr, issue
    :return: func
    """

    def _inner_decorator(func):
        @wraps(func)
        def _inner_match(crawler_text, patch_info):
            if patch_info.get("commits") or patch_info.get("pr"):
                return func(crawler_text, patch_info)
            match_list = list()
            for regex in regexes:
                match_list.extend(re.findall(pattern=regex, string=crawler_text))
            try:
                patch_info[component].extend(list(set(match_list)))
            except KeyError:
                logger.error(f"Patch info does not exist in field {component}")
            return func(crawler_text, patch_info)

        return _inner_match

    return _inner_decorator


class CvePlatform:
    """
    The parent class of cve reference URL
    """

    def __init__(
        self,
        platform,
        cve_num=None,
        base_url=None,
        format_text="text",
    ):
        self._platform = platform
        self.cve_num = cve_num
        self.base_url = base_url
        self._format = format_text
        self._Patch = namedtuple("Patch", ["platform", "commits", "pr", "issue"])

    @property
    def crawler_url(self):
        """
        Reference URL where cve information is located
        :return: url
        """
        return self.base_url.format(cve_num=self.cve_num)

    @property
    def patch_info(self):
        """
        Structure of cve patch information
        :return: instance of _Patch
        """
        return self._Patch(platform=self.crawler_url, commits=[], pr=[], issue=[])

    async def _rule_redirect(self, response):
        """
        Page multi layer jump data parsing
        :param response: http response data
        :return: response data
        """
        for redirect_rule in self._platform.get("redirect", []):
            format_text = self.format_text(response.text)
            target_val = list(
                set(
                    re.findall(
                        pattern=redirect_rule.get("regex", ""), string=format_text or ""
                    )
                )
            )
            try:
                url = redirect_rule["prefix"] + target_val[-1]
            except IndexError:
                url = redirect_rule["prefix"]
            response = await self._method(redirect_rule)(
                url, data=redirect_rule.get("body")
            )

        return response

    @staticmethod
    def _method(rule):
        method = http.get if rule.get("method", "get") == "get" else http.post
        return method

    async def crawling_patch(self):
        """
        Crawl patch information from the cve reference website
        :return: patch info
        """
        try:
            _response = await self._method(self._platform)(
                self.crawler_url, data=self._platform.get("body")
            )
            if "redirect" in self._platform:
                _response = await self._rule_redirect(response=_response)
        except RequestError:
            return None

        if _response.error or not _response.text:
            logger.error(
                f"Failed to access URL {self.crawler_url}, detail: {_response.error}"
            )
            return None

        formatted_text = (
            self.format_text(_response.text)
            if self._format == "text"
            else self.json(_response.text)
        )
        if formatted_text is None:
            logger.error(f"Failed to format string {_response.text}")
            return None

        patch_info_dict = self.match_patch(
            formatted_text, dict(self.patch_info._asdict())
        )
        return patch_info_dict

    @staticmethod
    @match(settings.get_regex(label="commit"), "commits")
    @match(settings.get_regex(label="pr"), "pr")
    @match(settings.get_regex(label="issue"), "issue")
    def match_patch(text, patch_info):
        """
        Matching patch related links,Use decorator to find
        :param text: Crawled content
        :param patch_info: self.patch_info
        :return: None
        """
        # Matching url de-duplication
        for key, value in patch_info.items():
            if isinstance(value, list):
                patch_info[key] = list(set(value))

        logger.info(f"Find patch: {patch_info}")
        return patch_info

    @staticmethod
    def format_text(text):
        """
        Format the content obtained from the URL
        :return: Formatted content
        """
        if not text:
            return None
        html_text = BeautifulSoup(text, "html.parser")
        return html_text.prettify()

    @staticmethod
    def json(text):
        """
        Rewrite the formatted web page to get the content method
        :param text: content of web page
        :return: formatted content
        """
        try:
            text_dict = json.loads(text)
            return json.dumps(text_dict, indent=4)
        except JSONDecodeError as e:
            logger.error(
                f"The format of the content obtained by bugzilla website is incorrect, "
                f"content is {text}, message is {e.msg}"
            )
            return None
